datafile <- file.path(here::here(), "data", "SEAS-monthly-sst.RData")
load(datafile)
Packages
library(factoextra)
Set up the data and data features.
Data_clean <- out$dat.clean
Data_dirty <- out$dat
pos_loc <- out$pos.loc
Data clean and meta data. Remove the lat/lon rows and change to Celcius.
lats <- Data_dirty[1,]
lons <- Data_dirty[2,]
asp <- c(table(lons)[1], table(lats)[1]) # lon (x) - lats (y)
bb.box <- c(min(lons), max(lons), min(lats), max(lats))
Create matrices for kmeans()
X <- Data_clean[c(-1,-2),] - 273.15
X_norm <- t(scale(t(X), scale=FALSE))
Set variables for kmeans() For the pilot analysis, set K = 12. Some relatively big to try to capture the variability.
iter.max <- 25
nstart <- 100
Function for the analyses.
p4 <- kheatmap(4, X_norm, Data_dirty, main.n=4)
p6 <- kheatmap(6, X_norm, Data_dirty)
p8 <- kheatmap(8, X_norm, Data_dirty)
p10 <- kheatmap(10, X_norm, Data_dirty)
p12 <- kheatmap(12, X_norm, Data_dirty)
p14 <- kheatmap(14, X_norm, Data_dirty)
p16 <- kheatmap(16, X_norm, Data_dirty)
p18 <- kheatmap(18, X_norm, Data_dirty)
p20 <- kheatmap(20, X_norm, Data_dirty)
p22 <- kheatmap(22, X_norm, Data_dirty)
p25 <- kheatmap(25, X_norm, Data_dirty, main.n=6)
p30 <- kheatmap(30, X_norm, Data_dirty, main.n=6)
It appears that at least K=14 is necessary to capture the May, to June, to July switches in patterns. The red line is June.